PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_microwave_July2025.csv"
OUT=price_changes_sellertype
DBMS=csv REPLACE;
run;
PROC IMPORT 
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_microwave_July2025.csv"
out = SELLER_TYPE_TREND1
DBMS=csv REPLACE;
run;
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_microwave_July2025.csv" 
out=final_outputvector
DBMS=csv REPLACE;
run;

proc contents data = SELLER_TYPE_TREND1;
run;


/***subsetting for the relevant analysis SKUs***/
/*B004VEP2NQ*/
/*B01M7RABCO*/
/*B00KNUHBJU*/
/*B01DEWZWDU*/
/*B01DEWZWGC*/
data relevantSKUsonly;
set price_changes_sellertype;
where ASIN in ('B00KNUHBJU', 'B004VEP2NQ', 'B01DEWZWDU','B01M7RABCO', 'B01DEWZWGC') ; 
run;

data relevantSKUs;
set price_changes_sellertype;
where ASIN in ('B00KNUHBJU', 'B004VEP2NQ', 'B01DEWZWDU','B01M7RABCO', 'B01DEWZWGC') and seller_type in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4'); 
run;

data relevantSKUs_used;
set price_changes_sellertype;
where ASIN in ('B00KNUHBJU', 'B004VEP2NQ', 'B01DEWZWDU','B01M7RABCO', 'B01DEWZWGC') and seller_type not in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4'); 
run;



PROC SQL;
CREATE TABLE numsellers_newoffers AS
SELECT
TIME1,
ASIN,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUs
group by
time1,
ASIN;
quit;

PROC SQL;
CREATE TABLE numsellersbyeachtype_newoffers AS
SELECT
TIME1,
ASIN,
seller_type,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUsonly
group by
time1,
ASIN,
seller_type;
quit;

/**creating num sellers DV**/
proc sort data = numsellers_newoffers (keep=time1) nodupkey out= final_numsellers_outputvector;
by time1;
run;

%macro numsellers(b,c);
data numsellers&b. (keep = time1 NUM_SELLERS_&b.);
set numsellers_newoffers;
where ASIN = &c.;
rename NUM_SELLERS = NUM_SELLERS_&b. ;
run;

data final_numsellers_outputvector ;
merge final_numsellers_outputvector (in=a) numsellers&b. (in=b);
by time1;
if a ;
if a and not b then NUM_SELLERS_&b. = 0;
run;
%mend;
%numsellers(b = PSN1, c = 'B00KNUHBJU'); 
%numsellers(b = LG, c = 'B004VEP2NQ'); 
/*%numsellers(b = FGD, c = 'B015TBCZ4I'); */
%numsellers(b = PSN2, c = 'B01DEWZWDU'); 
%numsellers(b = KNM, c = 'B01M7RABCO'); 
%numsellers(b = PSN3, c = 'B01DEWZWGC'); 


/*PROC EXPORT */
/*DATA=final_numsellers_outputvector*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\numsellers_dv2outcomemwave.csv"*/
/*REPLACE;*/
/*run;*/
PROC EXPORT 
DATA=final_numsellers_outputvector
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\numsellers_dv2outcomemwave_July2025.csv"
REPLACE;
run;
proc print data = final_numsellers_outputvector (obs=20);
run;

/***covariate creation**/

proc sort data = SELLER_TYPE_TREND1 out = SELLER_TYPE_TREND11;
by time1;
run;
proc print data = SELLER_TYPE_TREND11 (obs=20);
run;

data SELLER_TYPE_TREND2;
set SELLER_TYPE_TREND11;
if seller_type in ('3P Old Focal', '3P Old Non-Focal') then seller_type = '3P Old';
else if seller_type in ('Amazon Old Focal', 'Amazon Old Non-Focal') then seller_type = 'Amazon Old';
else seller_type = seller_type;
run;

/**seller characteristics trend***/
proc freq data = SELLER_TYPE_TREND2;
tables seller_type;
run;


/*subset for 3p new sellers*/
data SELLER_TYPE_TREND3;
set SELLER_TYPE_TREND2;
where seller_type not in ('Amazon Ne', '3P Old', 'Amazon Ol');
run;
proc contents data = SELLER_TYPE_TREND3;
run;

PROC SQL;
CREATE TABLE SELLER_charcs_byasin AS
SELECT
TIME1,
ASIN,
SELLER_TYPE,
mean(FBA) as mean_FBA,
/*std(FBA) as sd_FBA,*/
mean(Freeship) as mean_freeship,
/*std(Freeship) as sd_freeship,*/
mean(Prime) as mean_prime,
/*std(Prime) as sd_prime,*/
sum(condition) as new_offers,
mean(num_seller_Posrating) as mean_posrating,
/*std(num_seller_Posrating) as sd_posrating,*/
mean(num_seller_ratings) as mean_numratings,
/*std(num_seller_ratings) as sd_numratings,*/
mean(Seller_Rating) as mean_starrate
/*std(Seller_Rating) as sd_starrate*/
from 
SELLER_TYPE_TREND3
group by
time1,
ASIN,
seller_type;
quit;

/*for std= . recoding those to 0*/
/*data SELLER_charcs_byasin1;*/
/*set SELLER_charcs_byasin;*/
/*if sd_FBA = . then sd_FBA = 0;*/
/*if sd_freeship = . then sd_freeship = 0;*/
/*if sd_prime = . then sd_prime = 0;*/
/*if sd_posrating = . then sd_posrating =0;*/
/*if sd_numratings = . then sd_numratings =0;*/
/*if sd_starrate = . then sd_starrate =0;*/
/*run;*/


/**converting the data by time, one row per date**/

proc contents data = SELLER_charcs_byasin1;
run;

proc sort data = SELLER_charcs_byasin (keep=time1) nodupkey out= SELLER_charcs_byasin2;
by time1;
run;


%macro sellerchar(b,c);
data SELLER_charcs_byasin&b. (keep = time1 mean_FBA&b. 
mean_freeship&b. mean_numratings&b.   
mean_posrating&b. 
mean_prime&b.   
mean_starrate&b.
/*sd_FBA&b.*/
/*sd_freeship&b. */
/*sd_numratings&b.    */
/*sd_posrating&b.    */
/*sd_prime&b.  */
/*sd_starrate&b.*/
new_offers&b.);
set SELLER_charcs_byasin;
where ASIN = &c. ;
rename mean_FBA = mean_FBA&b.;
rename mean_freeship = mean_freeship&b.;  
rename mean_numratings = mean_numratings&b.;    
rename mean_posrating =  mean_posrating&b.;    
rename mean_prime =  mean_prime&b.;    
rename mean_starrate = mean_starrate&b.;
/*rename sd_FBA = sd_FBA&b.;*/
/*rename sd_freeship =  sd_freeship&b.;  */
/*rename sd_numratings = sd_numratings&b.;     */
/*rename sd_posrating = sd_posrating&b.;     */
/*rename sd_prime =  sd_prime&b.;   */
/*rename sd_starrate = sd_starrate&b.;*/
rename new_offers = new_offers&b.;
run;

data SELLER_charcs_byasin2 ;
merge SELLER_charcs_byasin2 (in=a) SELLER_charcs_byasin&b. (in=b);
by time1;
if a ;
if a and not b then do;
mean_FBA&b. = 0;
mean_freeship&b. = 0;
mean_posrating&b. = 0;
mean_prime&b. =0;
mean_starrate&b. = 0;
/*sd_FBA&b.=0;*/
/*sd_freeship&b.=0;*/
/*sd_numratings&b.=0;*/
/*sd_posrating&b.=0;*/
/*sd_prime&b.=0;*/
/*sd_starrate&b.=0;*/
new_offers&b.=0;
end;
run;
%mend;
%sellerchar(b = PSN1_3p, c = 'B00KNUHBJU'); 
%sellerchar(b = LG_3p, c = 'B004VEP2NQ'); 
%sellerchar(b = PSN2_3p, c = 'B01DEWZWDU'); 
%sellerchar(b = KNM_3p, c = 'B01M7RABCO'); 
%sellerchar(b = PSN3_3p, c = 'B01DEWZWGC');
proc print data = SELLER_charcs_byasin2 (obs=10);
run;

/**coding all missings with 0 values**/

proc contents data = SELLER_charcs_byasin2;
run;

data seller_characs_byasin3;
set SELLER_charcs_byasin2;

if	mean_FBAKNM_3p	=.	then	mean_FBAKNM_3p	= 0;
if	mean_FBALG_3p	=.	then	mean_FBALG_3p	= 0;
if	mean_FBAPSN1_3p	=.	then	mean_FBAPSN1_3p	= 0;
if	mean_FBAPSN2_3p	=.	then	mean_FBAPSN2_3p	= 0;
if	mean_FBAPSN3_3p	=.	then	mean_FBAPSN3_3p	= 0;
if	mean_freeshipKNM_3p	=.	then	mean_freeshipKNM_3p	= 0;
if	mean_freeshipLG_3p	=.	then	mean_freeshipLG_3p	= 0;
if	mean_freeshipPSN1_3p	=.	then	mean_freeshipPSN1_3p	= 0;
if	mean_freeshipPSN2_3p	=.	then	mean_freeshipPSN2_3p	= 0;
if	mean_freeshipPSN3_3p	=.	then	mean_freeshipPSN3_3p	= 0;
if	mean_numratingsKNM_3p	=.	then	mean_numratingsKNM_3p	= 0;
if	mean_numratingsLG_3p	=.	then	mean_numratingsLG_3p	= 0;
if	mean_numratingsPSN1_3p	=.	then	mean_numratingsPSN1_3p	= 0;
if	mean_numratingsPSN2_3p	=.	then	mean_numratingsPSN2_3p	= 0;
if	mean_numratingsPSN3_3p	=.	then	mean_numratingsPSN3_3p	= 0;
if	mean_posratingKNM_3p	=.	then	mean_posratingKNM_3p	= 0;
if	mean_posratingLG_3p	=.	then	mean_posratingLG_3p	= 0;
if	mean_posratingPSN1_3p	=.	then	mean_posratingPSN1_3p	= 0;
if	mean_posratingPSN2_3p	=.	then	mean_posratingPSN2_3p	= 0;
if	mean_posratingPSN3_3p	=.	then	mean_posratingPSN3_3p	= 0;
if	mean_primeKNM_3p	=.	then	mean_primeKNM_3p	= 0;
if	mean_primeLG_3p	=.	then	mean_primeLG_3p	= 0;
if	mean_primePSN1_3p	=.	then	mean_primePSN1_3p	= 0;
if	mean_primePSN2_3p	=.	then	mean_primePSN2_3p	= 0;
if	mean_primePSN3_3p	=.	then	mean_primePSN3_3p	= 0;
if	mean_starrateKNM_3p	=.	then	mean_starrateKNM_3p	= 0;
if	mean_starrateLG_3p	=.	then	mean_starrateLG_3p	= 0;
if	mean_starratePSN1_3p	=.	then	mean_starratePSN1_3p	= 0;
if	mean_starratePSN2_3p	=.	then	mean_starratePSN2_3p	= 0;
if	mean_starratePSN3_3p	=.	then	mean_starratePSN3_3p	= 0;
if	new_offersKNM_3p	=.	then	new_offersKNM_3p	= 0;
if	new_offersLG_3p	=.	then	new_offersLG_3p	= 0;
if	new_offersPSN1_3p	=.	then	new_offersPSN1_3p	= 0;
if	new_offersPSN2_3p	=.	then	new_offersPSN2_3p	= 0;
if	new_offersPSN3_3p	=.	then	new_offersPSN3_3p	= 0;
run;



proc sort data = seller_characs_byasin3 nodupkey out=seller_characs_byasin4 ;
by  TIME1 ;
run;

proc expand data=seller_characs_byasin4 out=seller_characs_byasin5 method = none; 
convert	mean_FBAKNM_3p	=	mean_FBAKNM_3p_lag	/transformout = (lag 1);
convert	mean_FBALG_3p	=	mean_FBALG_3p_lag	/transformout = (lag 1);
convert	mean_FBAPSN1_3p	=	mean_FBAPSN1_3p_lag	/transformout = (lag 1);
convert	mean_FBAPSN2_3p	=	mean_FBAPSN2_3p_lag	/transformout = (lag 1);
convert	mean_FBAPSN3_3p	=	mean_FBAPSN3_3p_lag	/transformout = (lag 1);
convert	mean_freeshipKNM_3p	=	mean_freeshipKNM_3p_lag	/transformout = (lag 1);
convert	mean_freeshipLG_3p	=	mean_freeshipLG_3p_lag	/transformout = (lag 1);
convert	mean_freeshipPSN1_3p	=	mean_freeshipPSN1_3p_lag	/transformout = (lag 1);
convert	mean_freeshipPSN2_3p	=	mean_freeshipPSN2_3p_lag	/transformout = (lag 1);
convert	mean_freeshipPSN3_3p	=	mean_freeshipPSN3_3p_lag	/transformout = (lag 1);
convert	mean_numratingsKNM_3p	=	mean_numratingsKNM_3p_lag	/transformout = (lag 1);
convert	mean_numratingsLG_3p	=	mean_numratingsLG_3p_lag	/transformout = (lag 1);
convert	mean_numratingsPSN1_3p	=	mean_numratingsPSN1_3p_lag	/transformout = (lag 1);
convert	mean_numratingsPSN2_3p	=	mean_numratingsPSN2_3p_lag	/transformout = (lag 1);
convert	mean_numratingsPSN3_3p	=	mean_numratingsPSN3_3p_lag	/transformout = (lag 1);
convert	mean_posratingKNM_3p	=	mean_posratingKNM_3p_lag	/transformout = (lag 1);
convert	mean_posratingLG_3p	=	mean_posratingLG_3p_lag	/transformout = (lag 1);
convert	mean_posratingPSN1_3p	=	mean_posratingPSN1_3p_lag	/transformout = (lag 1);
convert	mean_posratingPSN2_3p	=	mean_posratingPSN2_3p_lag	/transformout = (lag 1);
convert	mean_posratingPSN3_3p	=	mean_posratingPSN3_3p_lag	/transformout = (lag 1);
convert	mean_primeKNM_3p	=	mean_primeKNM_3p_lag	/transformout = (lag 1);
convert	mean_primeLG_3p	=	mean_primeLG_3p_lag	/transformout = (lag 1);
convert	mean_primePSN1_3p	=	mean_primePSN1_3p_lag	/transformout = (lag 1);
convert	mean_primePSN2_3p	=	mean_primePSN2_3p_lag	/transformout = (lag 1);
convert	mean_primePSN3_3p	=	mean_primePSN3_3p_lag	/transformout = (lag 1);
convert	mean_starrateKNM_3p	=	mean_starrateKNM_3p_lag	/transformout = (lag 1);
convert	mean_starrateLG_3p	=	mean_starrateLG_3p_lag	/transformout = (lag 1);
convert	mean_starratePSN1_3p	=	mean_starratePSN1_3p_lag	/transformout = (lag 1);
convert	mean_starratePSN2_3p	=	mean_starratePSN2_3p_lag	/transformout = (lag 1);
convert	mean_starratePSN3_3p	=	mean_starratePSN3_3p_lag	/transformout = (lag 1);
convert	new_offersKNM_3p	=	new_offersKNM_3p_lag	/transformout = (lag 1);
convert	new_offersLG_3p	=	new_offersLG_3p_lag	/transformout = (lag 1);
convert	new_offersPSN1_3p	=	new_offersPSN1_3p_lag	/transformout = (lag 1);
convert	new_offersPSN2_3p	=	new_offersPSN2_3p_lag	/transformout = (lag 1);
convert	new_offersPSN3_3p	=	new_offersPSN3_3p_lag	/transformout = (lag 1);

run;

/*****REMAINING COVARIATES ARE FROM THE ORIGINAL DV - FINAL DATA SET*****/

/*PROC IMPORT*/
/*DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Microwaves\final_gam_modeldata.csv" */
/*out=final_GAMMODELDATA*/
/*DBMS=csv REPLACE;*/
/*run;*/
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Microwaves\final_gam_modeldata_Rnd4.csv" 
out=final_gam_modeldata
DBMS=csv REPLACE;
run;
proc print data = final_gam_modeldata (obs=20);
run;

proc sort data = final_numsellers_outputvector out = sorted_finalnumsellersoutput;
by time1;
run;

proc sort data = final_gam_modeldata out = sorted_final_GAMMODELDATA;
by time1;
run;


proc sort data = seller_characs_byasin5 out = sorted_seller_characs_byasin5;
by time1;
run;


data finalnumsellers_modeldata;
merge sorted_finalnumsellersoutput (in=a) sorted_final_GAMMODELDATA (in=b) sorted_seller_characs_byasin5 (in=c);
by time1;
if a ;
numsellers_PSN = NUM_SELLERS_PSN1 + NUM_SELLERS_PSN2 + NUM_SELLERS_PSN3;
run;
proc print data = finalnumsellers_modeldata (obs=20);
run;



/**Adding special Amazon holidays***/

/*Valentine's day sale: Feb 5th - Feb 14th 

Easter day sale: March 19-March 26 (2018)

Mothers day: May 13 (2018)

Amazon prime day: JUly 16 (2018); post-prime day sale July 18 (2018)

Black Friday:
    Nov 17th - Nov 24th (2017)
    Nov 16th - Nov 23rd (2018)

Cyber Monday:
	Nov 27th (2017), Nov 26th (2018)

Amazon digital day: Dec 29 (2017), Dec 28 (2018)

12 days of deal sale: Dec 3rd - Dec 14 (2017), Dec 2nd - Dec 13 (2018)

Christmas day sale: Dec 20-Dec25

Hanukkah sale: Dec 1- Dec 7

Post Christmas sale: Dec 26- Jan 1st*/

data finalnumsellers_modeldata1;
set finalnumsellers_modeldata;
if time1 in ( '01Jan2018'd , 
              '05Feb2018'd, '06Feb2018'd,'07Feb2018'd,'08Feb2018'd,'09Feb2018'd,'10Feb2018'd,'11Feb2018'd,
              '12Feb2018'd, '13Feb2018'd,'14Feb2018'd,
			  '19Mar2018'd, '20Mar2018'd,'21Mar2018'd,'22Mar2018'd,'23Mar2018'd,'24Mar2018'd,'25Mar2018'd,'26Mar2018'd,
			  '13May2018'd, '16Jul2018'd, '18Jul2018'd, 
              '17Nov2017'd,'18Nov2017'd,'19Nov2017'd,'20Nov2017'd,'21Nov2017'd,'22Nov2017'd, '23Nov2017'd,'24Nov2017'd,'27Nov2017'd,
			  '16Nov2018'd,'17Nov2018'd,'18Nov2018'd,'19Nov2018'd,'20Nov2018'd,'21Nov2018'd,'22Nov2018'd, '23Nov2018'd,'26Nov2018'd,
              '03Dec2017'd,'04Dec2017'd,'05Dec2017'd,'06Dec2017'd,'07Dec2017'd,'08Dec2017'd,
              '09Dec2017'd,'10Dec2017'd,'11Dec2017'd,'12Dec2017'd,'13Dec2017'd,'14Dec2017'd,
			  '02Dec2018'd,'03Dec2018'd,'04Dec2018'd,'05Dec2018'd,'06Dec2018'd,'07Dec2018'd,'08Dec2018'd,
              '09Dec2018'd,'10Dec2018'd,'11Dec2018'd,'12Dec2018'd,'13Dec2018'd) then seasonal_sale = 1;
else if time1 >= '20Dec2017'd and time1 <= '31Dec2017'd then seasonal_sale = 1;
else if time1 >= '20Dec2018'd and time1 <= '31Dec2018'd then seasonal_sale = 1;
else seasonal_sale = 0;
run;

proc freq data = finalnumsellers_modeldata1;
tables season*seasonal_sale /list;
run;
proc print data = finalnumsellers_modeldata1 (obs=20);
run;

PROC EXPORT 
DATA=finalnumsellers_modeldata1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Microwaves\finalnumsellers_modeldata_JmRnd4.csv"
REPLACE;
run;

